This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
# general visualisation
library('ggplot2') # visualisation
library('scales') # visualisation
library('patchwork') # visualisation
library('RColorBrewer') # visualisation
library('corrplot') # visualisation
## corrplot 0.84 loaded
# general data manipulation
library('dplyr') # data manipulation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library('readr') # input/output
##
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
##
## col_factor
library('vroom') # input/output
library('skimr') # overview
library('tibble') # data wrangling
library('tidyr') # data wrangling
library('purrr') # data wrangling
##
## Attaching package: 'purrr'
## The following object is masked from 'package:scales':
##
## discard
library('stringr') # string manipulation
library('forcats') # factor manipulation
# specific visualisation
library('alluvial') # visualisation
library('ggrepel') # visualisation
library('ggforce') # visualisation
library('ggridges') # visualisation
library('gganimate') # animations
## No renderer backend detected. gganimate will default to writing frames to separate files
## Consider installing:
## - the `gifski` package for gif output
## - the `av` package for video output
## and restarting the R session
library('GGally') # visualisation
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
library('ggthemes') # visualisation
library('wesanderson') # visualisation
library('kableExtra') # display
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
# Date + forecast
library('lubridate') # date and time
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:dplyr':
##
## intersect, setdiff, union
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library('forecast') # time series analysis
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
#library('prophet') # time series analysis
library('timetk') # time series analysis
# Interactivity
library('crosstalk')
library('plotly')
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# parallel
library('foreach')
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
library('doParallel')
## Loading required package: iterators
## Loading required package: parallel
library(vroom)
library(stringr)
library(tidyverse)
train <- vroom(str_c('/Coursework/Timeseries/Timeseries_project/CSV files/sales_train_validation.csv'), delim = ",", col_types = cols())
prices <- vroom(str_c('/Coursework/Timeseries/Timeseries_project/CSV files/sell_prices.csv'), delim = ",", col_types = cols())
calendar <- read_csv(str_c('/Coursework/Timeseries/Timeseries_project/CSV files/calendar.csv'), col_types = cols())
sample_submit <- vroom(str_c('/Coursework/Timeseries/Timeseries_project/CSV files/sample_submission.csv'), delim = ",", col_types = cols())
extract_ts <- function(df){
min_date <- as.Date("2011-01-29")
df %>%
select(id, starts_with("d_")) %>%
pivot_longer(starts_with("d_"), names_to = "dates", values_to = "sales") %>%
mutate(dates = as.integer(str_remove(dates, "d_"))) %>%
mutate(dates = min_date + dates - 1) %>%
mutate(id = str_remove(id, "_validation"))
}
set.seed(4321)
foo <- train %>%
sample_n(50)
ts_out <- extract_ts(foo)
cols <- ts_out %>%
distinct(id) %>%
mutate(cols = rep_len(brewer.pal(7, "Set2"), length.out = n_distinct(ts_out$id)))
ts_out <- ts_out %>%
left_join(cols, by = "id")
pal <- cols$cols %>%
setNames(cols$id)
shared_ts <- highlight_key(ts_out)
palette(brewer.pal(9, "Set3"))
gg <- shared_ts %>%
ggplot(aes(dates, sales, col = id, group = id)) +
geom_line() +
scale_color_manual(values = pal) +
labs(x = "Date", y = "Sales") +
theme_tufte() +
NULL
filter <- bscols(
filter_select("ids", "Sales over time: Select a time series ID (remove with backspace key, navigate with arrow keys):", shared_ts, ~id, multiple = TRUE),
ggplotly(gg, dynamicTicks = TRUE),
widths = c(12, 12)
)
## Warning in bscols(filter_select("ids", "Sales over time: Select a time series
## ID (remove with backspace key, navigate with arrow keys):", : Sum of bscol width
## units is greater than 12
bscols(filter)